# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
# http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

CMAKE_MINIMUM_REQUIRED(VERSION 3.0)

PROJECT(ARM_DNN_LIBRARY C CXX ASM)
INCLUDE(GNUInstallDirs)

SET(CMAKE_C_STANDARD 99)
SET(CMAKE_C_EXTENSIONS NO)
SET(CMAKE_CXX_STANDARD 11)
SET(CMAKE_CXX_STANDARD_REQUIRED YES)
SET(CMAKE_CXX_EXTENSIONS NO)

SET(ARM_DNN_LIBRARY_LIBRARY_TYPE "default" CACHE STRING "Type of library (shared, static, or default) to build")
SET_PROPERTY(CACHE ARM_DNN_LIBRARY_LIBRARY_TYPE PROPERTY STRINGS default static shared)

IF(CMAKE_OSX_ARCHITECTURES)
  LIST(LENGTH CMAKE_OSX_ARCHITECTURES CMAKE_OSX_ARCHITECTURES_COUNT)
  IF(NOT CMAKE_OSX_ARCHITECTURES MATCHES "^(arm64|arm64e|arm64_32)$")
    MESSAGE(FATAL_ERROR "Unsupported CMAKE_OSX_ARCHITECTURES value \"${CMAKE_OSX_ARCHITECTURES}\"")
  ENDIF()
  SET(ARM_DNN_LIBRARY_TARGET_PROCESSOR "${CMAKE_OSX_ARCHITECTURES}")
ELSEIF(CMAKE_GENERATOR MATCHES "^Visual Studio " AND CMAKE_GENERATOR_PLATFORM)
  IF(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64")
    SET(ARM_DNN_LIBRARY_TARGET_PROCESSOR "arm64")
  ELSEIF(CMAKE_GENERATOR_PLATFORM STREQUAL "ARM64EC")
    SET(ARM_DNN_LIBRARY_TARGET_PROCESSOR "arm64ec")
  ELSE()
    MESSAGE(FATAL_ERROR "Unsupported Visual Studio architecture \"${CMAKE_GENERATOR_PLATFORM}\"")
  ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]")
  SET(ARM_DNN_LIBRARY_TARGET_PROCESSOR "arm")
ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
  SET(ARM_DNN_LIBRARY_TARGET_PROCESSOR "arm64")
ELSE()
  MESSAGE(FATAL_ERROR "Unsupported CMAKE_SYSTEM_PROCESSOR value \"${CMAKE_SYSTEM_PROCESSOR}\"")
ENDIF()

IF(NOT CMAKE_SYSTEM_NAME)
  MESSAGE(FATAL_ERROR "CMAKE_SYSTEM_NAME not defined")
ELSEIF(NOT CMAKE_SYSTEM_NAME MATCHES "^(Android|Darwin|iOS|Linux|Windows|CYGWIN|MSYS|QURT|QNX)$")
  MESSAGE(FATAL_ERROR "Unrecognized CMAKE_SYSTEM_NAME value \"${CMAKE_SYSTEM_NAME}\"")
ENDIF()

OPTION(ARM_DNN_LIBRARY_WITH_THREAD_POOL "Build ARM_DNN_LIBRARY with ThreadPool to support multi-threads." OFF)
OPTION(ARM_DNN_LIBRARY_WITH_OMP "Build ARM_DNN_LIBRARY with OpenMP to support multi-threads." ON)
OPTION(ARM_DNN_LIBRARY_WITH_ARM "Build ARM_DNN_LIBRARY with Arm CPU" ON)
OPTION(ARM_DNN_LIBRARY_ARM_WITH_FP16 "Build ARM_DNN_LIBRARY with Arm FP16 kernels" ON)
OPTION(ARM_DNN_LIBRARY_ARM_WITH_BF16 "Build ARM_DNN_LIBRARY with Arm BF16 kernels" ON)
OPTION(ARM_DNN_LIBRARY_ARM_WITH_DOTPROD "Build ARM_DNN_LIBRARY with Arm DotProd (integer dot product) kernels" ON)
OPTION(ARM_DNN_LIBRARY_ARM_WITH_SVE "Build ARM_DNN_LIBRARY with Arm SVE kernels" ON)
OPTION(ARM_DNN_LIBRARY_ARM_WITH_SVE2 "Build ARM_DNN_LIBRARY with Arm SVE2 kernels" ON)

IF(ARM_DNN_LIBRARY_WITH_THREAD_POOL)
  ADD_DEFINITIONS(-DARM_DNN_LIBRARY_WITH_THREAD_POOL)
  SET(ARM_DNN_LIBRARY_WITH_OMP OFF)
  MESSAGE(WARNNING "Disable ARM_DNN_LIBRARY_WITH_OMP when ARM_DNN_LIBRARY_WITH_THREAD_POOL is enabled.")
ENDIF()
IF(CMAKE_SYSTEM_NAME MATCHES "QNX")
  SET(ARM_DNN_LIBRARY_WITH_OMP OFF)
  MESSAGE(WARNNING "Disable ARM_DNN_LIBRARY_WITH_OMP when OS is QNX.")
ENDIF()
IF(ARM_DNN_LIBRARY_WITH_OMP)
  FIND_PACKAGE(OpenMP REQUIRED)
  IF(OpenMP_FOUND OR OpenMP_CXX_FOUND)
    IF(ANDROID_NDK_MAJOR)
      IF(ANDROID_NDK_MAJOR GREATER 20)
        SET(OpenMP_LINK_FLAGS "-fopenmp -static-openmp")
      ELSE()
        SET(OpenMP_LINK_FLAGS "-fopenmp")
      ENDIF()
    ENDIF()
    SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
    SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_LINK_FLAGS}")
    SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_LINK_FLAGS}")
    MESSAGE(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}")
    MESSAGE(STATUS "OpenMP C flags:  ${OpenMP_C_FLAGS}")
    MESSAGE(STATUS "OpenMP CXX flags:  ${OpenMP_CXX_FLAGS}")
    MESSAGE(STATUS "OpenMP LINK flags:  ${OpenMP_LINK_FLAGS}")
    MESSAGE(STATUS "OpenMP OpenMP_CXX_LIB_NAMES:  ${OpenMP_CXX_LIB_NAMES}")
    MESSAGE(STATUS "OpenMP OpenMP_CXX_LIBRARIES:  ${OpenMP_CXX_LIBRARIES}")
  ELSE()
    MESSAGE(FATAL_ERROR "OpenMP not found!")
    RETURN()
  ENDIF()
  ADD_DEFINITIONS(-DARM_DNN_LIBRARY_WITH_OMP)
ENDIF()
IF(ARM_DNN_LIBRARY_WITH_ARM)
  ADD_DEFINITIONS(-DARM_DNN_LIBRARY_WITH_ARM)
ENDIF()
IF(ARM_DNN_LIBRARY_ARM_WITH_FP16)
  ADD_DEFINITIONS(-DARM_DNN_LIBRARY_ARM_WITH_FP16)
ENDIF()
IF(ARM_DNN_LIBRARY_ARM_WITH_BF16)
  ADD_DEFINITIONS(-DARM_DNN_LIBRARY_ARM_WITH_BF16)
ENDIF()
IF(ARM_DNN_LIBRARY_ARM_WITH_DOTPROD)
  ADD_DEFINITIONS(-DARM_DNN_LIBRARY_ARM_WITH_DOTPROD)
ENDIF()
IF(ARM_DNN_LIBRARY_ARM_WITH_SVE)
  ADD_DEFINITIONS(-DARM_DNN_LIBRARY_ARM_WITH_SVE)
ENDIF()
IF(ARM_DNN_LIBRARY_ARM_WITH_SVE2)
  ADD_DEFINITIONS(-DARM_DNN_LIBRARY_ARM_WITH_SVE2)
ENDIF()

# Check OS, toolchain and etc.
IF(CMAKE_SYSTEM_NAME MATCHES "Android")
  SET(CMAKE_CXX_FLAGS "-llog ${CMAKE_CXX_FLAGS}")
  IF(ARM_DNN_LIBRARY_ARM_WITH_FP16)
    IF(ARM_DNN_LIBRARY_TARGET_PROCESSOR STREQUAL "arm")
      IF(ANDROID_NDK_MAJOR AND ANDROID_NDK_MAJOR LESS 22)
        # Suggested to use ndk r22 or newer version to be compatible with armv7 fp16 intrinsic func compilation
        MESSAGE(FATAL_ERROR "Arm v7+fp16 is only supported by ndk r22 or later, but receive ndk r${ANDROID_NDK_MAJOR} .")
      ENDIF()
    ENDIF()
    IF(ARM_DNN_LIBRARY_TARGET_PROCESSOR MATCHES "^arm64")
      IF(ANDROID_NDK_MAJOR AND ANDROID_NDK_MAJOR LESS 18)
        MESSAGE(FATAL_ERROR "Arm v8+fp16 is only supported by ndk r18 or later, but receive ndk r${ANDROID_NDK_MAJOR} .")
      ENDIF()
    ENDIF()
  ENDIF()
ENDIF()

INCLUDE_DIRECTORIES(include)
INCLUDE_DIRECTORIES(src)

SET(OP_SRCS 
  src/operators/relu/op.cc
  src/operators/concat/op.cc)

SET(KERNEL_SRCS
  src/operators/relu/kernels.cc
  src/operators/concat/kernels.cc)

SET(ARMV7A_NEON_KERNEL_SRCS
  src/operators/relu/codegen/fp32_aarch32_neon_x8.cc)

SET(ARMV7A_NEON_VFPV4_KERNEL_SRCS)

SET(ARMV7A_NEON_FP16_KERNEL_SRCS
  src/operators/relu/fp16_neon_x32.cc)

SET(ARMV8A_NEON_KERNEL_SRCS
  src/operators/relu/codegen/fp32_aarch64_neon_x16.cc)

SET(ARMV82A_FP16_NEON_KERNEL_SRCS
  src/operators/relu/fp16_neon_x32.cc)

SET(ARMV82A_BF16_NEON_KERNEL_SRCS)

SET(ARMV82A_DOTPROD_NEON_KERNEL_SRCS)

SET(ARMV82A_FP16_DOTPROD_NEON_KERNEL_SRCS)

SET(ARMV82A_BF16_DOTPROD_NEON_KERNEL_SRCS)

IF(ARM_DNN_LIBRARY_TARGET_PROCESSOR STREQUAL "arm")
  LIST(APPEND KERNEL_SRCS ${ARMV7A_NEON_KERNEL_SRCS})
  LIST(APPEND KERNEL_SRCS ${ARMV7A_NEON_VFPV4_KERNEL_SRCS})
  LIST(APPEND KERNEL_SRCS ${ARMV7A_NEON_FP16_KERNEL_SRCS})
  IF(ARM_DNN_LIBRARY_ARM_WITH_FP16)
    LIST(APPEND KERNEL_SRCS ${ARMV82A_FP16_NEON_KERNEL_SRCS})
    IF(ARM_DNN_LIBRARY_ARM_WITH_DOTPROD)
      LIST(APPEND KERNEL_SRCS ${ARMV82A_FP16_DOTPROD_NEON_KERNEL_SRCS})
    ENDIF()
  ENDIF()
  IF(ARM_DNN_LIBRARY_ARM_WITH_BF16)
    LIST(APPEND KERNEL_SRCS ${ARMV82A_BF16_NEON_KERNEL_SRCS})
    IF(ARM_DNN_LIBRARY_ARM_WITH_DOTPROD)
      LIST(APPEND KERNEL_SRCS ${ARMV82A_BF16_DOTPROD_NEON_KERNEL_SRCS})
    ENDIF()
  ENDIF()
  IF(ARM_DNN_LIBRARY_ARM_WITH_DOTPROD)
    LIST(APPEND KERNEL_SRCS ${ARMV82A_DOTPROD_NEON_KERNEL_SRCS})
  ENDIF()
ENDIF()

IF(ARM_DNN_LIBRARY_TARGET_PROCESSOR MATCHES "^arm64")
  LIST(APPEND KERNEL_SRCS ${ARMV8A_NEON_KERNEL_SRCS})
  IF(ARM_DNN_LIBRARY_ARM_WITH_FP16)
    LIST(APPEND KERNEL_SRCS ${ARMV82A_FP16_NEON_KERNEL_SRCS})
    IF(ARM_DNN_LIBRARY_ARM_WITH_DOTPROD)
      LIST(APPEND KERNEL_SRCS ${ARMV82A_FP16_DOTPROD_NEON_KERNEL_SRCS})
    ENDIF()
  ENDIF()
  IF(ARM_DNN_LIBRARY_ARM_WITH_BF16)
    LIST(APPEND KERNEL_SRCS ${ARMV82A_BF16_NEON_KERNEL_SRCS})
    IF(ARM_DNN_LIBRARY_ARM_WITH_DOTPROD)
      LIST(APPEND KERNEL_SRCS ${ARMV82A_FP16_DOTPROD_NEON_KERNEL_SRCS})
    ENDIF()
  ENDIF()
  IF(ARM_DNN_LIBRARY_ARM_WITH_DOTPROD)
    LIST(APPEND KERNEL_SRCS ${ARMV82A_DOTPROD_NEON_KERNEL_SRCS})
  ENDIF()
ENDIF()

IF(ARM_DNN_LIBRARY_TARGET_PROCESSOR STREQUAL "arm")
  SET_PROPERTY(SOURCE ${KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -marm ")
  SET_PROPERTY(SOURCE ${ARMV7A_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon ")
  SET_PROPERTY(SOURCE ${ARMV7A_NEON_VFPV4_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon-vfpv4 ")
  SET_PROPERTY(SOURCE ${ARMV7A_NEON_FP16_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv7-a -mfpu=neon-fp16 ")
  SET_PROPERTY(SOURCE ${ARMV8A_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8-a -mfpu=neon-fp-armv8 ")
  SET_PROPERTY(SOURCE ${ARMV82A_FP16_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8 ")
  SET_PROPERTY(SOURCE ${ARMV82A_BF16_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+bf16 -mfpu=neon-fp-armv8 ")
  SET_PROPERTY(SOURCE ${ARMV82A_DOTPROD_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+dotprod -mfpu=neon-fp-armv8 ")
  SET_PROPERTY(SOURCE ${ARMV82A_FP16_DOTPROD_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+fp16+dotprod -mfpu=neon-fp-armv8 ")
  SET_PROPERTY(SOURCE ${ARMV82A_BF16_DOTPROD_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+bf16+dotprod -mfpu=neon-fp-armv8 ")
  IF(ANDROID_NDK_MAJOR AND ANDROID_NDK_MAJOR LESS 21)
    SET_PROPERTY(SOURCE ${KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mfloat-abi=softfp ")
  ENDIF()
  IF(CMAKE_C_COMPILER_ID STREQUAL "GNU")
    SET_PROPERTY(SOURCE ${ARMV7A_NEON_FP16_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mfp16-format=ieee ")
    SET_PROPERTY(SOURCE ${ARMV82A_FP16_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mfp16-format=ieee ")
    SET_PROPERTY(SOURCE ${ARMV82A_FP16_DOTPROD_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -mfp16-format=ieee ")
  ENDIF()
ENDIF()

IF(ARM_DNN_LIBRARY_TARGET_PROCESSOR MATCHES "^arm64" AND NOT MSVC)
  SET_PROPERTY(SOURCE ${ARMV82A_FP16_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+fp16 ")
  SET_PROPERTY(SOURCE ${ARMV82A_BF16_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+bf16 ")
  SET_PROPERTY(SOURCE ${ARMV82A_DOTPROD_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+dotprod ")
  SET_PROPERTY(SOURCE ${ARMV82A_FP16_DOTPROD_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+fp16+dotprod ")
  SET_PROPERTY(SOURCE ${ARMV82A_BF16_DOTPROD_NEON_KERNEL_SRCS} APPEND_STRING PROPERTY COMPILE_FLAGS " -march=armv8.2-a+bf16+dotprod ")
ENDIF()

SET(ALL_SRCS 
  src/core/types.cc
  src/utilities/logging.cc
  src/utilities/cpu_info.cc
  src/utilities/thread_pool.cc
  src/runtime/device.cc
  src/runtime/context.cc
  ${KERNEL_SRCS}
  ${OP_SRCS})
IF(ARM_DNN_LIBRARY_LIBRARY_TYPE STREQUAL "default")
  ADD_LIBRARY(arm_dnn_library ${ALL_SRCS})
ELSEIF(ARM_DNN_LIBRARY_LIBRARY_TYPE STREQUAL "shared")
  ADD_LIBRARY(arm_dnn_library SHARED ${ALL_SRCS})
ELSEIF(ARM_DNN_LIBRARY_LIBRARY_TYPE STREQUAL "static")
  ADD_LIBRARY(arm_dnn_library STATIC ${ALL_SRCS})
ELSE()
  MESSAGE(FATAL_ERROR "Unsupported library type \"${ARM_DNN_LIBRARY_LIBRARY_TYPE}\", should be \"static\", \"shared\", or \"default\".")
ENDIF()

INSTALL(TARGETS arm_dnn_library
  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
INSTALL(DIRECTORY include/arm_dnn_library DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
